import plotly.express as px
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
ds1 = pd.read_csv('EV_Market.csv')
ds1.head(7)
| Brand | Model | AccelSec | TopSpeed_KmH | Range_Km | Efficiency_WhKm | FastCharge_KmH | RapidCharge | PowerTrain | PlugType | BodyStyle | Segment | Seats | PriceEuro | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Tesla | Model 3 Long Range Dual Motor | 4.6 | 233 | 450 | 161 | 940 | Yes | AWD | Type 2 CCS | Sedan | D | 5 | 55480 |
| 1 | Volkswagen | ID.3 Pure | 10.0 | 160 | 270 | 167 | 250 | No | RWD | Type 2 CCS | Hatchback | C | 5 | 30000 |
| 2 | Polestar | 2 | 4.7 | 210 | 400 | 181 | 620 | Yes | AWD | Type 2 CCS | Liftback | D | 5 | 56440 |
| 3 | BMW | iX3 | 6.8 | 180 | 360 | 206 | 560 | Yes | RWD | Type 2 CCS | SUV | D | 5 | 68040 |
| 4 | Honda | e | 9.5 | 145 | 170 | 168 | 190 | Yes | RWD | Type 2 CCS | Hatchback | B | 4 | 32997 |
| 5 | Lucid | Air | 2.8 | 250 | 610 | 180 | 620 | Yes | AWD | Type 2 CCS | Sedan | F | 5 | 105000 |
| 6 | Volkswagen | e-Golf | 9.6 | 150 | 190 | 168 | 220 | No | FWD | Type 2 CCS | Hatchback | C | 5 | 31900 |
ds2 = pd.read_csv('EV_India.csv')
ds2.head(7)
| Sr. No. | State Name | Total Electric Vehicle | Total Non-Electric Vehicle | Total | |
|---|---|---|---|---|---|
| 0 | 1 | Andaman & Nicobar Island | 162 | 1,46,945 | 1,47,107 |
| 1 | 2 | Andra Pradesh | NaN | NaN | NaN |
| 2 | 3 | Arunachal Pradesh | 20 | 2,52,965 | 2,52,985 |
| 3 | 4 | Assam | 64766 | 46,77,053 | 47,41,819 |
| 4 | 5 | Bihar | 83335 | 1,04,07,078 | 1,04,90,413 |
| 5 | 6 | Chandigarh | 2812 | 7,46,881 | 7,49,693 |
| 6 | 7 | Chhattisgarh | 20966 | 68,36,200 | 68,57,166 |
ds3 = pd.read_csv('EV_Sales.csv')
ds3.head(7)
| YEAR | 2 W | 3 W | 4 W | BUS | TOTAL | |
|---|---|---|---|---|---|---|
| 0 | Apr-17 | 96 | 4748 | 198 | 0 | 5042 |
| 1 | May-17 | 91 | 6720 | 215 | 2 | 7028 |
| 2 | Jun-17 | 137 | 7178 | 149 | 1 | 7465 |
| 3 | Jul-17 | 116 | 8775 | 120 | 0 | 9011 |
| 4 | Aug-17 | 99 | 8905 | 137 | 0 | 9141 |
| 5 | Sep-17 | 109 | 7414 | 193 | 0 | 7716 |
| 6 | Oct-17 | 160 | 7250 | 214 | 0 | 7624 |
ds4 = pd.read_csv('EV_FP.csv')
ds4.head(7)
| Name | Location | Year | Kilometers_Driven | Fuel_Type | Transmission | Owner_Type | Mileage | Engine | Power | Seats | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Maruti Alto K10 LXI CNG | Delhi | 2014 | 40929 | CNG | Manual | First | 32.26 km/kg | 998 CC | 58.2 bhp | 4.0 |
| 1 | Maruti Alto 800 2016-2019 LXI | Coimbatore | 2013 | 54493 | Petrol | Manual | Second | 24.7 kmpl | 796 CC | 47.3 bhp | 5.0 |
| 2 | Toyota Innova Crysta Touring Sport 2.4 MT | Mumbai | 2017 | 34000 | Diesel | Manual | First | 13.68 kmpl | 2393 CC | 147.8 bhp | 7.0 |
| 3 | Toyota Etios Liva GD | Hyderabad | 2012 | 139000 | Diesel | Manual | First | 23.59 kmpl | 1364 CC | null bhp | 5.0 |
| 4 | Hyundai i20 Magna | Mumbai | 2014 | 29000 | Petrol | Manual | First | 18.5 kmpl | 1197 CC | 82.85 bhp | 5.0 |
| 5 | Mahindra XUV500 W8 2WD | Coimbatore | 2016 | 85609 | Diesel | Manual | Second | 16.0 kmpl | 2179 CC | 140 bhp | 7.0 |
| 6 | Toyota Fortuner 4x2 AT TRD Sportivo | Pune | 2015 | 59000 | Diesel | Automatic | First | 12.55 kmpl | 2982 CC | 168.7 bhp | 7.0 |
print('No.of rows and columns for ds1: ', ds1.shape)
print('No.of rows and columns for ds2: ', ds2.shape)
print('No.of rows and columns for ds3: ', ds3.shape)
print('No.of rows and columns for ds4: ', ds4.shape)
No.of rows and columns for ds1: (103, 14) No.of rows and columns for ds2: (36, 5) No.of rows and columns for ds3: (74, 6) No.of rows and columns for ds4: (7253, 11)
print("Column names of ds1: ", ds1.columns)
print("Column names of ds2: ", ds2.columns)
print("Column names of ds3: ", ds3.columns)
print("Column names of ds4: ", ds4.columns)
Column names of ds1: Index(['Brand', 'Model', 'AccelSec', 'TopSpeed_KmH', 'Range_Km',
'Efficiency_WhKm', 'FastCharge_KmH', 'RapidCharge', 'PowerTrain',
'PlugType', 'BodyStyle', 'Segment', 'Seats', 'PriceEuro'],
dtype='object')
Column names of ds2: Index(['Sr. No.', 'State Name', 'Total Electric Vehicle',
'Total Non-Electric Vehicle', 'Total'],
dtype='object')
Column names of ds3: Index(['YEAR', '2 W', '3 W', '4 W', 'BUS', 'TOTAL'], dtype='object')
Column names of ds4: Index(['Name', 'Location', 'Year', 'Kilometers_Driven', 'Fuel_Type',
'Transmission', 'Owner_Type', 'Mileage', 'Engine', 'Power', 'Seats'],
dtype='object')
import sweetviz as sv
import warnings
warnings.filterwarnings("ignore")
report = sv.analyze(ds1)
report.show_html("sweetviz_report.html")
Done! Use 'show' commands to display/save. |█████████████████████████████████████████| [100%] 00:02 -> (00:00 left)
Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
report2 = sv.analyze(ds2)
report2.show_html("sweetviz_report2.html")
Done! Use 'show' commands to display/save. |█████████████████████████████████████████| [100%] 00:01 -> (00:00 left)
Report sweetviz_report2.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
report3 = sv.analyze(ds3)
report3.show_html("sweetviz_report3.html")
Done! Use 'show' commands to display/save. |█████████████████████████████████████████| [100%] 00:01 -> (00:00 left)
Report sweetviz_report3.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
report4 = sv.analyze(ds4)
report4.show_html("sweetviz_report4.html")
Done! Use 'show' commands to display/save. |█████████████████████████████████████████| [100%] 00:01 -> (00:00 left)
Report sweetviz_report4.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
plt.figure(figsize=(5,5))
sns.barplot(x='AccelSec', y=ds1['Brand'].sort_values(ascending=True), data=ds1, palette="muted", ci=None)
plt.xlabel('Price', family='serif', fontsize=15, labelpad=15)
plt.ylabel('Brands', family='serif', fontsize=15, labelpad=15)
plt.title(label="India's Electric Vehicle Acceleration", weight=200, family='sans-serif', size=15, pad=15)
plt.show()
x = ds1['Segment'].value_counts().plot.pie(radius=2, cmap='magma', startangle=0, textprops=dict(family='serif'), pctdistance=.5)
plt.pie(x=[1], radius=1.2, colors='white')
plt.title(label='Electric Vehicles in India: Different Segments', family='sans-serif', size=15, pad=80)
plt.ylabel('')
plt.show()
sns.catplot(data=ds1, x='Brand', kind='count', palette='dark', height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=15)
plt.ylabel('Count', family='serif', size=19)
plt.title("A brand's total number of EV models manufactured", family='sans-serif', size=19, pad=15)
plt.show()
x = ds1['BodyStyle'].value_counts().plot.pie(radius=2, startangle=0, textprops=dict(family='serif'), cmap='inferno')
plt.pie(x=[1], radius=1.2, colors='white')
plt.title(label='Body Styles of Electric Vehicles in India', family='sans-serif', size=15, pad=110)
plt.ylabel('')
plt.show()
sns.countplot(data=ds1, x='Seats', palette='plasma')
plt.xlabel('Number of Seats', family='serif', size=12, labelpad=12)
plt.ylabel('Count', family='serif', size=12, labelpad=12)
plt.title(label='Electric Vehicles with Different Seating Capacity in India', family='sans-serif', size=15, pad=15)
plt.show()
sns.catplot(kind='bar', data=ds1, x='Brand', y='Seats', palette='inferno', ci=None, height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=16, labelpad=12)
plt.ylabel('Number of Seats', family='serif', size=16, labelpad=12)
plt.xticks(family='serif')
plt.yticks(family='serif')
plt.title('Brand-specific analysis of seat numbers', family='sans-serif', size=19, pad=15)
Text(0.5, 1.0, 'Brand-specific analysis of seat numbers')
ds1['PlugType'].value_counts().sort_values(ascending=False).plot.barh()
plt.xlabel('Count', family='serif', size=12, labelpad=14)
plt.ylabel('Plug Type', family='serif', size=12, labelpad=14)
plt.title('Plug Types for Electric Vehicles in India', family='sans-serif', size=15, pad=15)
plt.show()
plt.plot(ds1['PriceEuro'], color='violet')
plt.xlabel('Number of Samples', family='serif', size=15, labelpad=12)
plt.ylabel('Price', family='serif', size=15, labelpad=12)
plt.title('Price Comparison', family='sans-serif', size=15, pad=12);
plt.figure(figsize=(8, 8))
sns.barplot(data=ds1, x='TopSpeed_KmH', y='Brand', ci=None, palette='dark')
plt.xlabel('Max Speed', family='serif', size=15)
plt.ylabel('Brand', family='serif', size=15)
plt.title(label='Brand-wise Speed Comparison of EVs in India', family='serif', size=15, pad=12)
plt.show()
sns.catplot(kind='bar', data=ds1, x='Brand', y='Range_Km', palette='muted', ci=None, height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=15, labelpad=12)
plt.ylabel('Range', family='serif', size=15, labelpad=12)
plt.title('Brand-wise Analysis of the Range Parameter', family='serif', size=19, pad=15)
Text(0.5, 1.0, 'Brand-wise Analysis of the Range Parameter')
melt_ds2 = ds2.melt(id_vars='State Name', value_vars=['Total Electric Vehicle', 'Total Non-Electric Vehicle'], var_name='Vehicle Type', value_name='Count')
reduced_ds2 = melt_ds2.iloc[::4]
plt.figure(figsize=(9,6))
sns.barplot(x='State Name', y='Count', hue='Vehicle Type', data=reduced_ds2, palette='plasma').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Count')
plt.title('Comparison of Electric and Non-Electric Vehicles by State (Downsampled)')
plt.legend(title='Vehicle Type')
plt.show()
reduced_ds2 = ds2.iloc[::4]
plt.figure(figsize=(9, 6))
sns.barplot(x='State Name', y='Total Electric Vehicle', data=reduced_ds2, palette='viridis').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Total Electric Vehicle')
plt.title('Total Electric Vehicle by State (Downsampled)')
plt.show()
plt.figure(figsize=(9,6))
sns.barplot(x='State Name', y='Total Non-Electric Vehicle', data=reduced_ds2, palette='dark').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Total Non-Electric Vehicle')
plt.title('Total Non-Electric Vehicle by State (Downsampled)')
plt.show()
ds3 = pd.read_csv('EV_Sales.csv')
import plotly.express as px
fig = px.line(ds3, x='YEAR', y='2 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '2 W': '2 W'})
fig.show()
fig = px.line(ds3, x='YEAR', y='3 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '3 W': '3 W'})
fig.show()
fig = px.line(ds3, x='YEAR', y='4 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '4 W': '4 W'})
fig.show()
fig = px.line(ds3, x='YEAR', y='BUS', title='Year-wise Trend', labels={'YEAR': 'YEAR', 'BUS': 'BUS'})
fig.show()
reduced_ds4 = ds4.iloc[::850]
plt.figure(figsize=(10, 6))
sns.swarmplot(x='Mileage', y='Name', data=reduced_ds4, palette='inferno')
plt.xlabel('Mileage', fontsize=12)
plt.ylabel('Name of EV', fontsize=12)
plt.title('Mileage of EVs in India', fontsize=15)
plt.show()
ds1['PowerTrain'].replace(to_replace=['RWD','FWD','AWD'],value=[0, 1, 2],inplace=True)
ds1['RapidCharge'].replace(to_replace=['No','Yes'],value=[0, 1],inplace=True)
X = ds1[['AccelSec','TopSpeed_KmH','Efficiency_WhKm','FastCharge_KmH', 'Range_Km', 'RapidCharge', 'Seats', 'PriceEuro','PowerTrain']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=9)
X_pca = pca.fit_transform(X_scaled)
df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9'])
df_pca.head(7)
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2.429225 | -0.554599 | -1.147772 | -0.882791 | 0.839988 | -0.959297 | 0.998880 | 0.711148 | -0.396662 |
| 1 | -2.322483 | -0.345449 | 0.896473 | -1.305529 | 0.079598 | 0.235116 | -0.213678 | -0.544135 | -0.181867 |
| 2 | 1.587851 | 0.008899 | -0.650523 | 0.041024 | 0.593537 | -0.698248 | 0.058718 | 0.248837 | -0.202775 |
| 3 | 0.291018 | -0.000150 | -0.307702 | -0.514196 | -1.608861 | 0.291624 | 0.364999 | -0.235543 | 0.261663 |
| 4 | -2.602679 | -0.626489 | -0.888088 | 0.585294 | -0.802108 | 0.027387 | -0.084955 | -0.507790 | -0.049904 |
| 5 | 3.429398 | -0.673183 | -0.731118 | -0.463163 | 0.563761 | 0.323336 | -0.871201 | 0.039206 | -0.029359 |
| 6 | -2.232736 | -0.044259 | 1.091770 | -0.364093 | 0.989476 | -0.221604 | -0.070239 | -0.557169 | 0.030956 |
wcss = []
for i in range(1, 11):
kmean = KMeans(n_clusters=i, init='k-means++', random_state=90)
kmean.fit(X_pca)
wcss.append(kmean.inertia_)
plt.figure(figsize=(6,6))
plt.title('Plot of the Elbow Method', size=15, family='serif')
plt.plot(range(1, 11), wcss, color= "magenta")
plt.xticks(range(1, 11), family='serif')
plt.yticks(family='serif')
plt.xlabel('Number of Custers (K)', family='serif')
plt.ylabel('WCSS', family='serif')
plt.grid()
plt.tick_params(axis='both', direction='inout', length=6, color='purple', grid_color='lightgray', grid_linestyle='--')
plt.show()
kmean = KMeans(n_clusters=3, init='k-means++', random_state=90)
kmean.fit(X_pca)
KMeans(n_clusters=3, random_state=90)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=3, random_state=90)
print(kmean.labels_)
[2 0 1 0 0 2 0 0 0 1 1 0 0 1 0 0 2 0 0 0 0 1 0 2 2 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 2 0 1 0 0 0 0 2 2 0 1 2 0 0 1 0 0 0 0 2 0 1 1 1 0 2 0 1 0 1 0 1 2 1 0 0 1 0 1 2 0 1 0 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 0 1 1 1 1]
pd.Series(kmean.labels_).value_counts()
0 58 1 32 2 13 Name: count, dtype: int64
ds1['clusters'] = kmean.labels_
plt.figure(figsize=(8,6))
sns.scatterplot(data=df_pca, x='PC1', y='PC9', s=70, hue=kmean.labels_, palette='muted', zorder=2, alpha=.9)
plt.scatter(x=kmean.cluster_centers_[:,0], y=kmean.cluster_centers_[:,1], marker="*", c="black", s=80, label="centroids")
plt.xlabel('PC1', family='serif', size=12, labelpad=12)
plt.ylabel('PC9', family='serif', size=12,labelpad=12)
plt.grid()
plt.tick_params(grid_color='lightgray', grid_linestyle='--', zorder=1)
plt.legend(title='Labels', fancybox=True, shadow=True)
plt.title('K-Means Clustering', family='serif', size=15,pad=12)
plt.show()